/*NOTE: This is model code to illustrate the logic used to generate the datasets for analysis in the SCARP pilot.
This code is not intended to run as is - due to Epic restrictions on the sharing of intellectual property all 
of the table and attribute names have been "masked" with names enclosed in square brackets if they were pulled 
from Clarity (EPIC).*/


/*****************************************************
PROJECT:  BRIDGE C2 
PILOT: SCRAP
DATE: 9/13/2022


PROJECT DESCRIPTION: Use ASCEND approach to compare BC, BC and 
BC cancer prevention services in patients with
	No SDH Screening
	Documented Need
	Documented No Need
Stratified by SDH need: Food, Housing, Transportation
STUDY PERIOD: 7/1/2016-2/29/2020		


DETAILS (This Syntax): pull independent variables for BC_cohort

INPUT DATA: 
SCRAP.BC_patients

OUTPUT DATA:
SCRAP.BC_exclusions_LU
SCRAP.BC_exclusions_detail
SCRAP.BC_first_exclusion
scrap.BC_patient_covariates


SECTIONS:
A. Pull exclusion dates and apply, update observation length;
B. Define SDH Screening status for food, housing and transportation insecurity
	No SDH Screening
	Documented Need
	Documented No Need
C. Pull demographics
	race_ethnicity: Hispanic, non-Hispanic Black, non-Hispanic white, Non-Hispanic other, no data
	Preferred language: English/non-English
	Age, primary payor at index
	FPL on or after Index
	Visits per year


*****************************************************/
options compress=yes reuse=yes;
libname SCRAP "e:\\sasroot\BRIDGE C2\SCRAP\";
libname RD odbc noprompt="dsn=research_dev;Trusted_Connection=yes" bulkload=yes;

/***************************************************************/

/***************************************************************/
/*A. Pull exclusion dates and apply, update observation length;*/
/***************************************************************/

options mprint symbolgen mlogic;
%let patients=scrap.SCRAP_BC_PATIENTS;/*location of your denominator file*/
%let idname=raw_patid;/*name of the id variable that links to Clarity pat_id*/
%let LU_output=scrap.BC_exclusion_LU;/*output file of exclusion codes*/
%let end_date='29FEB2020'd;
%let BC_exclusion_detail=SCRAP.BC_exclusion_detail;
%let first_BC_exclude=SCRAP.BC_First_exclusion;


/*get last_encounter before exclude_date*/
/*(observation period runs from index visit to last encounter before exclusion)*/
proc sql;
create table last_enc as
select x.*,
max(e.contact_date) as last_encounter format date9.
from scrap.BC_first_exclusion x
left join scrap.scrap_potential_cohort_pc_enc e
on x.raw_patid=e.raw_patid
where e.contact_date<x.exclude_date
group by x.raw_patid,x.exclude_date ;

proc sql;
create table dropped_pat as
select distinct raw_patid from scrap.BC_first_exclusion
where raw_patid ^in (select distinct raw_patid from last_enc);

*merge exclusions to patients;
proc sql;
create table BC_Pat as
select p.*,
x.exclude_date,
x.last_encounter as last_enc2,
d.sas_death_date format date9.
from SCRAP.SCRAP_BC_patients p
left join last_enc x
on p.raw_patid=x.raw_patid
left join ochin.death d on p.raw_patid=d.raw_patid
where p.raw_patid^in(select distinct raw_patid from dropped_pat);

proc freq data=BC_pat;
where last_enc>sas_death_date;
tables last_enc*sas_death_date/list;
run;*;

*truncate observation period at last_enc2;
data BC_pat;
set BC_pat;
format observation_end date9.;
where sas_death_date=. or sas_death_date>last_enc;
if last_enc2=. then observation_end=last_enc;
if exclude_date^=. or death_date^=. then do;
	if last_enc2<last_enc then
		observation_end=last_enc2;
	if last_enc2>=last_enc then
		observation_end=last_enc;
end;
observation_length=yrdif(index_visit, observation_end,'age');
run;

data BC_pat2;
set BC_pat;
if observation_length<1 and exclude_date^=1 then delete;
run;


/*****************************************************************************
B. Define SDH Screening status for food, housing and transportation insecurity
	No SDH Screening
	Documented Need
	Documented No Need
*******************************************************************************/
/*link patients to screens at their primary_clinic*/
proc sql;
create table pat_screens as
select 
p.raw_patid as raw_patid2,
p.delivery_site_id,
p.index_visit,
p.observation_end,
sdh.*,
domain='Housing Instability' as housing_screen,
domain='Food Insecurity' as food_screen,
domain='Transportation' as Transportation_screen,
sdh_positive_screen='1' as positive_screen
from BC_pat2 p
left join scrap.scrap_sdh_screening_3_domains sdh
on p.raw_patid=sdh.raw_patid and p.delivery_site_id=sdh.delivery_site_id
and sdh.screen_date between p.index_visit and p.observation_end
order by p.raw_patid, domain, screen_date;
quit;

*summarize observation period screens;
proc sql;
create table screen_summary as
select raw_patid2 as raw_patid,
delivery_site_id,
max(Food_screen) as screened_food,
max(food_screen*positive_screen) as pos_food,
max(Housing_screen) as screened_housing,
max(Housing_screen*positive_screen) as pos_housing,
max(Transportation_screen) as screened_transportation,
max(Transportation_screen*positive_screen) as pos_transportation
from pat_screens group by raw_patid2, delivery_site_id;

/*time to first screen*/
data first_screen (keep=raw_patid2 first_food first_housing first_transportation);
set pat_screens;
by raw_patid2 domain;
retain first_food first_housing first_transportation;
format first_food first_housing first_transportation date9.;
if first.raw_patid2 then do;
	first_food=.;
	first_housing=.;
	first_transportation=.;
end;
if first.raw_patid2 or first.domain then do;
	if domain='Food Insecurity' then first_food=screen_date;
	if domain='Housing Instability' then first_housing=screen_date;
	if domain='Transportation' then first_transportation=screen_date;
end;
if last.raw_patid2 then output;
run;

proc sql;
create table BC_pat3 as
select p.*,
s.screened_food,
f.first_food,
s.pos_food,
s.screened_housing,
f.first_housing,
s.pos_housing,
s.screened_transportation,
f.first_transportation,
s.pos_transportation
from BC_pat2 p left join screen_summary s
on p.raw_patid=s.raw_patid
left join first_screen f
on p.raw_patid=f.raw_patid2;
quit;


data BC_pat3;
set BC_pat3;
length SDH_food SDH_Housing SDH_Transportation $20;
time_to_food=first_food-index_visit;
time_to_housing=first_housing-index_visit;
time_to_transportation=first_transportation-index_visit;
if screened_food=0 then SDH_food='3_Never Screened';
else if screened_Food=1 then do;
	if pos_Food=0 then SDH_Food='2_Negative Screen';
	if pos_Food=1 then SDH_Food='1_Positive Screen';
end;
if screened_housing=0 then SDH_housing='3_Never Screened';
else if screened_housing=1 then do;
	if pos_housing=0 then SDH_housing='2_Negative Screen';
	if pos_housing=1 then SDH_housing='1_Positive Screen';
end;
if screened_transportation=0 then SDH_transportation='3_Never Screened';
else if screened_transportation=1 then do;
	if pos_transportation=0 then SDH_transportation='2_Negative Screen';
	if pos_transportation=1 then SDH_transportation='1_Positive Screen';
end;
run;

data scrap.temp_BC_pat3;
set BC_pat3;
run; 


/*****************************************************************************
C. Pull demographics and encounter variables
	race_ethnicity: Hispanic, non-Hispanic Black, non-Hispanic white, Non-Hispanic other, no data
	Preferred language: English/non-English
	age at index
	primary payor at index
	FPL on or after Index
	PC visits per year in observation period

*******************************************************************************/
/*demographics*/
proc sql;
create table BC_pat4 as
select p.*,
d.hispanic_description,
d.race_description,
d.pat_pref_language_spoken_desc
from BC_pat3 p left join ochin.demographic d
on p.raw_patid=d.raw_patid;


data BC_pat4;
set BC_pat4;
/*Note that ASCEND manuscript groups missing language with other*/
if pat_pref_language_spoken_desc='English'
	then language_c='English';
else if pat_pref_language_spoken_desc='Spanish, Castilian'
	then language_c='Spanish';
else if pat_pref_language_spoken_desc^ in('English', 'Spanish, Castilian') 
	then language_c='Other';
if Hispanic_description='No' then do;
	if race_description='White' then race_ethnic='Nonhispanic White';
	if race_description='Black or African American' then race_ethnic='Nonhispanic Black';
	if race_description ^in('White','No information','Black or African American', 'Unknown','Refuse to answer')
		then race_ethnic='Nonhispanic Other';
end;
if Hispanic_description='Yes' then race_ethnic='Hispanic';
if race_ethnic=' ' then race_ethnic='Unknown';
run;


/*PC encounters*/
proc sql;
create table pc_enc as
select c.raw_patid,
count(distinct e.encounterid) as PC_encounters
from BC_pat4 c left join 
scrap.scrap_potential_cohort_pc_enc e
on c.raw_patid=e.raw_patid and
	e.contact_date between c.index_visit and c.observation_end
group by c.raw_patid;
create table BC_pat5 as
select c.*,
e.PC_encounters,
round(e.PC_encounters/observation_length,1) as yearly_visits
from BC_pat4 c left join pc_enc e
on c.raw_patid=e.raw_patid;

 
data BC_pat5;
set BC_pat5;
if yearly_visits<3 then visit_cat='1-2';
if yearly_visits>=3 and yearly_visits<5 then visit_cat='3-4';
if yearly_visits>=5 and yearly_visits<7 then visit_cat='5-6';
if yearly_visits>=7 then visit_cat='7+';
run;


/*index visit variables*/
proc sql;
create table encounters as
select p.raw_patid,
p.index_visit,
e.contact_date,
e.age_at_encounter_date,
e.FPL_percentage,
e.payor_type_research
from BC_pat5 p left join scrap.scrap_potential_cohort_pc_enc e
on p.raw_patid=e.raw_patid and
e.contact_date>=p.index_visit
order by p.raw_patid, e.contact_date;


data index;
set encounters;
by raw_patid;
if first.raw_patid then output;
run;
data first_fpl;
set encounters;
where fpl_percentage^=.;
by raw_patid;
if first.raw_patid then output;
run;

proc sql;
create table BC_pat6 as
select p.*,
i.age_at_encounter_date,
i.payor_type_research,
f.fpl_percentage
from BC_pat5 p left join index i
on p.raw_patid=i.raw_patid
left join first_fpl f on 
p.raw_patid=f.raw_patid;

proc freq data=BC_pat6;
tables payor_type_research
age_at_encounter_date
fpl_percentage/missing;
run;

data scrap.BC_patient_covariates;
set BC_pat6;
if fpl_percentage=. then FPL_cat='No data';
if fpl_percentage^=. and FPL_percentage<=100 then FPL_cat='<=100';
if fpl_percentage>100 and FPL_percentage<=200 then FPL_cat='<=200';
if fpl_percentage>200 then FPL_cat='>200';
if payor_type_research=' ' then payor_type_research='Uninsured';
if age_at_encounter_date<40 then age_cat='23-39';
if age_at_encounter_date>=40 and age_at_encounter_date<50 then age_cat='40-49';
if age_at_encounter_date>=50 and age_at_encounter_date<65 then age_cat='50-64';
if age_at_encounter_date>=65 then age_cat='65-73';
run;


data SCRAP.BC_patient_covariates;
set SCRAP.BC_patient_covariates;
english=pat_pref_language_spoken_desc='English';
rename age_at_encounter_date=age_at_index_visit;
run; 

data SCRAP.BC_patient_covariates;
set SCRAP.BC_patient_covariates;
length obs_cat $10;
if years_observation<2 then obs_cat='<2';
if years_observation>=2 then obs_cat='2-3.6';
run; 
